discount: 0.998, experiment_type: cl_gan, gae_lambda: 0.995, gan_discriminator_iters: 200, gan_discriminator_layers: [128, 128], gan_generator_iters: 5, gan_generator_layers: [256, 256], gan_noise_size: 4, gan_outer_iters: 5, goal_noise_level: 1, goal_range: 10, goal_size: 2, horizon: 400, improvement_threshold: 10, inner_iters: 5, max_reward: 6000, min_reward: 5, num_new_goals: 200, num_old_goals: 200, outer_iters: 200, pg_batch_size: 20000,
|
policy performance initialization |
GAN initialization |
|
policy performance itr: 0 mean_rewards: 317.6282681693553 coverage: 0.0 |
goals itr: 0 |
|
policy performance itr: 1 mean_rewards: 144.09843852714206 coverage: 0.0 |
goals itr: 1 |
|
policy performance itr: 2 mean_rewards: 297.0048192822292 coverage: 0.0 |
goals itr: 2 |
|
policy performance itr: 3 mean_rewards: 221.66190618889058 coverage: 0.0 |
goals itr: 3 |
|
policy performance itr: 4 mean_rewards: 306.12285907024227 coverage: 0.0 |
goals itr: 4 |
|
policy performance itr: 5 mean_rewards: 123.84055889039033 coverage: 0.0 |
goals itr: 5 |
|
policy performance itr: 6 mean_rewards: 122.9344809211749 coverage: 0.0 |
goals itr: 6 |
|
policy performance itr: 7 mean_rewards: 0.0 coverage: 0.0 |
goals itr: 7 |
|
policy performance itr: 8 mean_rewards: 130.9269206534292 coverage: 0.0 |
goals itr: 8 |
|
policy performance itr: 9 mean_rewards: 9.22127636280848 coverage: 0.0 |
goals itr: 9 |
|
policy performance itr: 10 mean_rewards: 518.0453850701153 coverage: 0.03571428571428571 |
goals itr: 10 |
|
policy performance itr: 11 mean_rewards: 811.2408372791235 coverage: 0.03571428571428571 |
goals itr: 11 |
|
policy performance itr: 12 mean_rewards: 2460.2889100348425 coverage: 0.07142857142857142 |
goals itr: 12 |
|
policy performance itr: 13 mean_rewards: 2850.3386681215457 coverage: 0.14285714285714285 |
goals itr: 13 |
|
policy performance itr: 14 mean_rewards: 3540.320388578374 coverage: 0.14285714285714285 |
goals itr: 14 |
|
policy performance itr: 15 mean_rewards: 4643.917133000976 coverage: 0.14285714285714285 |
goals itr: 15 |
|
policy performance itr: 16 mean_rewards: 7386.4515782127955 coverage: 0.25 |
goals itr: 16 |
|
policy performance itr: 17 mean_rewards: 11018.768506886812 coverage: 0.32142857142857145 |
goals itr: 17 |
|
policy performance itr: 18 mean_rewards: 14755.162282534395 coverage: 0.32142857142857145 |
goals itr: 18 |
|
policy performance itr: 19 mean_rewards: 15979.145935662153 coverage: 0.39285714285714285 |
goals itr: 19 |
|
policy performance itr: 20 mean_rewards: 20723.669847448266 coverage: 0.4642857142857143 |
goals itr: 20 |
|
policy performance itr: 21 mean_rewards: 21065.95484139933 coverage: 0.42857142857142855 |
goals itr: 21 |
|
policy performance itr: 22 mean_rewards: 22237.117374048034 coverage: 0.4642857142857143 |
goals itr: 22 |
|
policy performance itr: 23 mean_rewards: 20809.124929073754 coverage: 0.4642857142857143 |
goals itr: 23 |
|
policy performance itr: 24 mean_rewards: 21392.058174751382 coverage: 0.4642857142857143 |
goals itr: 24 |
|
policy performance itr: 25 mean_rewards: 26311.931154347956 coverage: 0.5 |
goals itr: 25 |
|
policy performance itr: 26 mean_rewards: 25900.101465997697 coverage: 0.5 |
goals itr: 26 |
|
policy performance itr: 27 mean_rewards: 26269.310126031 coverage: 0.5 |
goals itr: 27 |
|
policy performance itr: 28 mean_rewards: 25191.97422438762 coverage: 0.5 |
goals itr: 28 |
|
policy performance itr: 29 mean_rewards: 24526.468484229 coverage: 0.5 |
goals itr: 29 |
|
policy performance itr: 30 mean_rewards: 22820.506994132364 coverage: 0.5 |
goals itr: 30 |
|
policy performance itr: 31 mean_rewards: 24055.794369586954 coverage: 0.5 |
goals itr: 31 |
|
policy performance itr: 32 mean_rewards: 22791.82076521832 coverage: 0.5 |
goals itr: 32 |
|
policy performance itr: 33 mean_rewards: 20401.46969829133 coverage: 0.5 |
goals itr: 33 |
|
policy performance itr: 34 mean_rewards: 18925.089721026616 coverage: 0.5 |
goals itr: 34 |
|
policy performance itr: 35 mean_rewards: 15576.816014114742 coverage: 0.4642857142857143 |
goals itr: 35 |
|
policy performance itr: 36 mean_rewards: 24387.903978402202 coverage: 0.5 |
goals itr: 36 |
|
policy performance itr: 37 mean_rewards: 27338.371253341098 coverage: 0.6071428571428571 |
goals itr: 37 |
|
policy performance itr: 38 mean_rewards: 27084.78955211176 coverage: 0.5714285714285714 |
goals itr: 38 |
|
policy performance itr: 39 mean_rewards: 25728.22193911046 coverage: 0.6071428571428571 |
goals itr: 39 |
|
policy performance itr: 40 mean_rewards: 22233.836358499415 coverage: 0.6071428571428571 |
goals itr: 40 |
|
policy performance itr: 41 mean_rewards: 26705.710070140067 coverage: 0.5714285714285714 |
goals itr: 41 |
|
policy performance itr: 42 mean_rewards: 29891.33443962126 coverage: 0.6428571428571429 |
goals itr: 42 |
|
policy performance itr: 43 mean_rewards: 33825.58622161723 coverage: 0.6428571428571429 |
goals itr: 43 |
|
policy performance itr: 44 mean_rewards: 33866.80460171369 coverage: 0.6428571428571429 |
goals itr: 44 |
|
policy performance itr: 45 mean_rewards: 32940.37191476087 coverage: 0.6071428571428571 |
goals itr: 45 |
|
policy performance itr: 46 mean_rewards: 37197.73551119472 coverage: 0.6785714285714286 |
goals itr: 46 |
|
policy performance itr: 47 mean_rewards: 34024.941089258595 coverage: 0.6428571428571429 |
goals itr: 47 |
|
policy performance itr: 48 mean_rewards: 34473.891803244864 coverage: 0.6428571428571429 |
goals itr: 48 |
|
policy performance itr: 49 mean_rewards: 36146.21681766318 coverage: 0.6785714285714286 |
goals itr: 49 |
|
policy performance itr: 50 mean_rewards: 30295.59107309598 coverage: 0.5714285714285714 |
goals itr: 50 |
|
policy performance itr: 51 mean_rewards: 36914.87887482816 coverage: 0.7142857142857143 |
goals itr: 51 |
|
policy performance itr: 52 mean_rewards: 31373.982177326467 coverage: 0.5714285714285714 |
goals itr: 52 |
|
policy performance itr: 53 mean_rewards: 36212.72003850961 coverage: 0.6785714285714286 |
goals itr: 53 |
|
policy performance itr: 54 mean_rewards: 39713.75610022594 coverage: 0.7142857142857143 |
goals itr: 54 |
|
policy performance itr: 55 mean_rewards: 32853.191445866694 coverage: 0.75 |
goals itr: 55 |
|
policy performance itr: 56 mean_rewards: 34668.44348204018 coverage: 0.75 |
goals itr: 56 |
|
policy performance itr: 57 mean_rewards: 37529.02293702738 coverage: 0.7142857142857143 |
goals itr: 57 |
|
policy performance itr: 58 mean_rewards: 37649.80634547916 coverage: 0.7142857142857143 |
goals itr: 58 |
|
policy performance itr: 59 mean_rewards: 40595.821454197714 coverage: 0.7142857142857143 |
goals itr: 59 |
|
policy performance itr: 60 mean_rewards: 41389.443801209905 coverage: 0.7142857142857143 |
goals itr: 60 |
|
policy performance itr: 61 mean_rewards: 43052.16525688678 coverage: 0.7142857142857143 |
goals itr: 61 |
|
policy performance itr: 62 mean_rewards: 43536.98581313544 coverage: 0.75 |
goals itr: 62 |
|
policy performance itr: 63 mean_rewards: 43656.57009651979 coverage: 0.75 |
goals itr: 63 |
|
policy performance itr: 64 mean_rewards: 36037.08505853296 coverage: 0.7142857142857143 |
goals itr: 64 |
|
policy performance itr: 65 mean_rewards: 40202.62557295223 coverage: 0.75 |
goals itr: 65 |
|
policy performance itr: 66 mean_rewards: 42471.804453671444 coverage: 0.75 |
goals itr: 66 |
|
policy performance itr: 67 mean_rewards: 41507.25400242167 coverage: 0.75 |
goals itr: 67 |
|
policy performance itr: 68 mean_rewards: 40576.379693833354 coverage: 0.7857142857142857 |
goals itr: 68 |
|
policy performance itr: 69 mean_rewards: 36528.792425564236 coverage: 0.7142857142857143 |
goals itr: 69 |
|
policy performance itr: 70 mean_rewards: 34220.26684114845 coverage: 0.75 |
goals itr: 70 |
|
policy performance itr: 71 mean_rewards: 36853.57425615199 coverage: 0.75 |
goals itr: 71 |
|
policy performance itr: 72 mean_rewards: 38836.16008616784 coverage: 0.7142857142857143 |
goals itr: 72 |
|
policy performance itr: 73 mean_rewards: 36372.90466507651 coverage: 0.75 |
goals itr: 73 |
|
policy performance itr: 74 mean_rewards: 37674.76217173436 coverage: 0.7142857142857143 |
goals itr: 74 |
|
policy performance itr: 75 mean_rewards: 43413.2008296124 coverage: 0.7857142857142857 |
goals itr: 75 |
|
policy performance itr: 76 mean_rewards: 44051.87365600521 coverage: 0.7857142857142857 |
goals itr: 76 |
|
policy performance itr: 77 mean_rewards: 43535.58069048143 coverage: 0.7857142857142857 |
goals itr: 77 |
|
policy performance itr: 78 mean_rewards: 41816.09446469007 coverage: 0.8214285714285714 |
goals itr: 78 |
|
policy performance itr: 79 mean_rewards: 38965.209275713074 coverage: 0.7857142857142857 |
goals itr: 79 |
|
policy performance itr: 80 mean_rewards: 42928.95189817582 coverage: 0.7857142857142857 |
goals itr: 80 |
|
policy performance itr: 81 mean_rewards: 44008.58962221031 coverage: 0.8214285714285714 |
goals itr: 81 |
|
policy performance itr: 82 mean_rewards: 44463.96656725241 coverage: 0.8214285714285714 |
goals itr: 82 |
|
policy performance itr: 83 mean_rewards: 45785.163818997 coverage: 0.8214285714285714 |
goals itr: 83 |
|
policy performance itr: 84 mean_rewards: 43749.101536112874 coverage: 0.8571428571428571 |
goals itr: 84 |
|
policy performance itr: 85 mean_rewards: 45134.018175242825 coverage: 0.7857142857142857 |
goals itr: 85 |
|
policy performance itr: 86 mean_rewards: 42983.84838840996 coverage: 0.8214285714285714 |
goals itr: 86 |
|
policy performance itr: 87 mean_rewards: 46946.61488317984 coverage: 0.8571428571428571 |
goals itr: 87 |
|
policy performance itr: 88 mean_rewards: 44346.37791660067 coverage: 0.8214285714285714 |
goals itr: 88 |
|
policy performance itr: 89 mean_rewards: 49187.4475599416 coverage: 0.8571428571428571 |
goals itr: 89 |
|
policy performance itr: 90 mean_rewards: 47473.25334989625 coverage: 0.8928571428571429 |
goals itr: 90 |
|
policy performance itr: 91 mean_rewards: 46977.80349482036 coverage: 0.7857142857142857 |
goals itr: 91 |
|
policy performance itr: 92 mean_rewards: 49976.84885059171 coverage: 0.7857142857142857 |
goals itr: 92 |
|
policy performance itr: 93 mean_rewards: 44268.44722528929 coverage: 0.7857142857142857 |
goals itr: 93 |
|
policy performance itr: 94 mean_rewards: 36959.07533293619 coverage: 0.7142857142857143 |
goals itr: 94 |
|
policy performance itr: 95 mean_rewards: 36831.65126897704 coverage: 0.7857142857142857 |
goals itr: 95 |
|
policy performance itr: 96 mean_rewards: 38621.593153854206 coverage: 0.8214285714285714 |
goals itr: 96 |
|
policy performance itr: 97 mean_rewards: 44393.97042431219 coverage: 0.8928571428571429 |
goals itr: 97 |
|
policy performance itr: 98 mean_rewards: 45035.97696184608 coverage: 0.8928571428571429 |
goals itr: 98 |
|
policy performance itr: 99 mean_rewards: 43345.489262793104 coverage: 0.8928571428571429 |
goals itr: 99 |
|
policy performance itr: 100 mean_rewards: 45359.184498691 coverage: 0.8928571428571429 |
goals itr: 100 |
|
policy performance itr: 101 mean_rewards: 38474.473059560056 coverage: 0.8571428571428571 |
goals itr: 101 |
|
policy performance itr: 102 mean_rewards: 39440.64468498287 coverage: 0.8571428571428571 |
goals itr: 102 |
|
policy performance itr: 103 mean_rewards: 42780.29954312329 coverage: 0.8571428571428571 |
goals itr: 103 |
|
policy performance itr: 104 mean_rewards: 39129.95689834056 coverage: 0.7857142857142857 |
goals itr: 104 |
|
policy performance itr: 105 mean_rewards: 40697.8735573621 coverage: 0.75 |
goals itr: 105 |
|
policy performance itr: 106 mean_rewards: 48794.60097103174 coverage: 0.8571428571428571 |
goals itr: 106 |
|
policy performance itr: 107 mean_rewards: 45469.56252309343 coverage: 0.8928571428571429 |
goals itr: 107 |
|
policy performance itr: 108 mean_rewards: 44914.33283606776 coverage: 0.9642857142857143 |
goals itr: 108 |
|
policy performance itr: 109 mean_rewards: 44763.2847919715 coverage: 0.9285714285714286 |
goals itr: 109 |
|
policy performance itr: 110 mean_rewards: 45908.75351532814 coverage: 0.9642857142857143 |
goals itr: 110 |
|
policy performance itr: 111 mean_rewards: 45205.85674235568 coverage: 0.9285714285714286 |
goals itr: 111 |
|
policy performance itr: 112 mean_rewards: 45212.400451262496 coverage: 1.0 |
goals itr: 112 |
|
policy performance itr: 113 mean_rewards: 44996.461055507134 coverage: 1.0 |
goals itr: 113 |
|
policy performance itr: 114 mean_rewards: 46433.81268226764 coverage: 1.0 |
goals itr: 114 |
|
policy performance itr: 115 mean_rewards: 45815.906791898284 coverage: 1.0 |
goals itr: 115 |
|
policy performance itr: 116 mean_rewards: 48145.430909436436 coverage: 0.9285714285714286 |
goals itr: 116 |
|
policy performance itr: 117 mean_rewards: 50130.78030549126 coverage: 0.9642857142857143 |
goals itr: 117 |
|
policy performance itr: 118 mean_rewards: 46962.41317366064 coverage: 0.8928571428571429 |
goals itr: 118 |
|
policy performance itr: 119 mean_rewards: 42444.31948352798 coverage: 0.9642857142857143 |
goals itr: 119 |
|
policy performance itr: 120 mean_rewards: 38845.138658252974 coverage: 0.8928571428571429 |
goals itr: 120 |
|
policy performance itr: 121 mean_rewards: 41857.968862611226 coverage: 0.9285714285714286 |
goals itr: 121 |
|
policy performance itr: 122 mean_rewards: 43238.531198465505 coverage: 0.9642857142857143 |
goals itr: 122 |
|
policy performance itr: 123 mean_rewards: 44765.9231464923 coverage: 0.9285714285714286 |
goals itr: 123 |
|
policy performance itr: 124 mean_rewards: 45264.70313087761 coverage: 0.9642857142857143 |
goals itr: 124 |
|
policy performance itr: 125 mean_rewards: 50186.72334817041 coverage: 1.0 |
goals itr: 125 |
|
policy performance itr: 126 mean_rewards: 52944.16304654278 coverage: 1.0 |
goals itr: 126 |
|
policy performance itr: 127 mean_rewards: 45585.34301410841 coverage: 0.9285714285714286 |
goals itr: 127 |
|
policy performance itr: 128 mean_rewards: 46011.352468689925 coverage: 0.9285714285714286 |
goals itr: 128 |
|
policy performance itr: 129 mean_rewards: 51144.01523776182 coverage: 0.9642857142857143 |
goals itr: 129 |
|
policy performance itr: 130 mean_rewards: 47685.411936268 coverage: 0.9285714285714286 |
goals itr: 130 |
|
policy performance itr: 131 mean_rewards: 51337.42137953637 coverage: 0.9642857142857143 |
goals itr: 131 |
|
policy performance itr: 132 mean_rewards: 52599.948799525286 coverage: 0.9642857142857143 |
goals itr: 132 |
|
policy performance itr: 133 mean_rewards: 51170.12660763504 coverage: 1.0 |
goals itr: 133 |
|
policy performance itr: 134 mean_rewards: 50433.29140284979 coverage: 0.9642857142857143 |
goals itr: 134 |
|
policy performance itr: 135 mean_rewards: 52942.59685362397 coverage: 0.9642857142857143 |
goals itr: 135 |
|
policy performance itr: 136 mean_rewards: 52320.25874468626 coverage: 0.9642857142857143 |
goals itr: 136 |
|
policy performance itr: 137 mean_rewards: 54888.33458564347 coverage: 0.9642857142857143 |
goals itr: 137 |
|
policy performance itr: 138 mean_rewards: 53316.10146631748 coverage: 0.9642857142857143 |
goals itr: 138 |
|
policy performance itr: 139 mean_rewards: 52872.4137030247 coverage: 0.9642857142857143 |
goals itr: 139 |
|
policy performance itr: 140 mean_rewards: 53378.85133661357 coverage: 0.9285714285714286 |
goals itr: 140 |
|
policy performance itr: 141 mean_rewards: 52057.003179671316 coverage: 0.9642857142857143 |
goals itr: 141 |
|
policy performance itr: 142 mean_rewards: 54181.97933036378 coverage: 0.9642857142857143 |
goals itr: 142 |
|
policy performance itr: 143 mean_rewards: 53750.66139462999 coverage: 0.9285714285714286 |
goals itr: 143 |
|
policy performance itr: 144 mean_rewards: 56166.347578222354 coverage: 0.9285714285714286 |
goals itr: 144 |
|
policy performance itr: 145 mean_rewards: 55879.63026927819 coverage: 0.9642857142857143 |
goals itr: 145 |
|
policy performance itr: 146 mean_rewards: 50992.25989908176 coverage: 0.9285714285714286 |
goals itr: 146 |
|
policy performance itr: 147 mean_rewards: 57695.29575509696 coverage: 1.0 |
goals itr: 147 |
|
policy performance itr: 148 mean_rewards: 53364.11068813497 coverage: 0.9285714285714286 |
goals itr: 148 |
|
policy performance itr: 149 mean_rewards: 54704.19547435221 coverage: 0.9642857142857143 |
goals itr: 149 |
|
policy performance itr: 150 mean_rewards: 54459.25940765365 coverage: 0.9642857142857143 |
goals itr: 150 |
|
policy performance itr: 151 mean_rewards: 53904.478854689 coverage: 0.9642857142857143 |
goals itr: 151 |
|
policy performance itr: 152 mean_rewards: 53436.263655461815 coverage: 0.9642857142857143 |
goals itr: 152 |
|
policy performance itr: 153 mean_rewards: 51950.24164651926 coverage: 0.9642857142857143 |
goals itr: 153 |
|
policy performance itr: 154 mean_rewards: 52640.67663150733 coverage: 1.0 |
goals itr: 154 |
|
policy performance itr: 155 mean_rewards: 57361.90353073693 coverage: 0.9642857142857143 |
goals itr: 155 |
|
policy performance itr: 156 mean_rewards: 54257.907723296485 coverage: 0.9642857142857143 |
goals itr: 156 |
|
policy performance itr: 157 mean_rewards: 52881.36028312678 coverage: 0.9642857142857143 |
goals itr: 157 |
|
policy performance itr: 158 mean_rewards: 53172.51706052958 coverage: 1.0 |
goals itr: 158 |
|
policy performance itr: 159 mean_rewards: 54414.83033744262 coverage: 0.9642857142857143 |
goals itr: 159 |
|
policy performance itr: 160 mean_rewards: 57675.755715790845 coverage: 0.9285714285714286 |
goals itr: 160 |
|
policy performance itr: 161 mean_rewards: 60945.31354830752 coverage: 1.0 |
goals itr: 161 |
|
policy performance itr: 162 mean_rewards: 60589.76660380872 coverage: 1.0 |
goals itr: 162 |
|
policy performance itr: 163 mean_rewards: 54308.00602667169 coverage: 0.9285714285714286 |
goals itr: 163 |
|
policy performance itr: 164 mean_rewards: 59184.32698100521 coverage: 1.0 |
goals itr: 164 |
|
policy performance itr: 165 mean_rewards: 56967.02992599882 coverage: 1.0 |
goals itr: 165 |
|
policy performance itr: 166 mean_rewards: 56189.06993381336 coverage: 0.9642857142857143 |
goals itr: 166 |
|
policy performance itr: 167 mean_rewards: 58381.43043329129 coverage: 0.9642857142857143 |
goals itr: 167 |
|
policy performance itr: 168 mean_rewards: 53301.402062475114 coverage: 0.9642857142857143 |
goals itr: 168 |
|
policy performance itr: 169 mean_rewards: 52473.860512946274 coverage: 0.9642857142857143 |
goals itr: 169 |
|
policy performance itr: 170 mean_rewards: 57539.84342293192 coverage: 1.0 |
goals itr: 170 |
|
policy performance itr: 171 mean_rewards: 56371.03662575804 coverage: 1.0 |
goals itr: 171 |
|
policy performance itr: 172 mean_rewards: 52251.91684752482 coverage: 1.0 |
goals itr: 172 |
|
policy performance itr: 173 mean_rewards: 54534.219070195024 coverage: 1.0 |
goals itr: 173 |
|
policy performance itr: 174 mean_rewards: 54517.31993463288 coverage: 1.0 |
goals itr: 174 |
|
policy performance itr: 175 mean_rewards: 49836.31205245078 coverage: 1.0 |
goals itr: 175 |
|
policy performance itr: 176 mean_rewards: 50807.52171710757 coverage: 1.0 |
goals itr: 176 |
|
policy performance itr: 177 mean_rewards: 52534.15462465972 coverage: 1.0 |
goals itr: 177 |
|
policy performance itr: 178 mean_rewards: 52254.741105462606 coverage: 1.0 |
goals itr: 178 |
|
policy performance itr: 179 mean_rewards: 52222.534781989074 coverage: 1.0 |
goals itr: 179 |
|
policy performance itr: 180 mean_rewards: 50160.34955887222 coverage: 1.0 |
goals itr: 180 |
|
policy performance itr: 181 mean_rewards: 53455.00985368878 coverage: 1.0 |
goals itr: 181 |
|
policy performance itr: 182 mean_rewards: 53781.2875739051 coverage: 0.9642857142857143 |
goals itr: 182 |
|
policy performance itr: 183 mean_rewards: 56023.12036076695 coverage: 1.0 |
goals itr: 183 |
|
policy performance itr: 184 mean_rewards: 58241.93061183455 coverage: 1.0 |
goals itr: 184 |
|
policy performance itr: 185 mean_rewards: 56674.31932213842 coverage: 0.9642857142857143 |
goals itr: 185 |
|
policy performance itr: 186 mean_rewards: 59643.19944291938 coverage: 1.0 |
goals itr: 186 |
|
policy performance itr: 187 mean_rewards: 55073.2731388868 coverage: 1.0 |
goals itr: 187 |
|
policy performance itr: 188 mean_rewards: 57787.5348627125 coverage: 1.0 |
goals itr: 188 |
|
policy performance itr: 189 mean_rewards: 57835.49816021267 coverage: 1.0 |
goals itr: 189 |
|
policy performance itr: 190 mean_rewards: 56673.18046536997 coverage: 0.9642857142857143 |
goals itr: 190 |
|
policy performance itr: 191 mean_rewards: 56354.916749644835 coverage: 0.9642857142857143 |
goals itr: 191 |
|
policy performance itr: 192 mean_rewards: 56132.825687588076 coverage: 1.0 |
goals itr: 192 |
|
policy performance itr: 193 mean_rewards: 54226.025494723675 coverage: 1.0 |
goals itr: 193 |
|
policy performance itr: 194 mean_rewards: 54049.84256412597 coverage: 0.9642857142857143 |
goals itr: 194 |
|
policy performance itr: 195 mean_rewards: 54161.24521664972 coverage: 1.0 |
goals itr: 195 |
|
policy performance itr: 196 mean_rewards: 56542.27768019703 coverage: 1.0 |
goals itr: 196 |
|
policy performance itr: 197 mean_rewards: 54500.871103814476 coverage: 0.9642857142857143 |
goals itr: 197 |
|
policy performance itr: 198 mean_rewards: 59499.59330779978 coverage: 1.0 |
goals itr: 198 |
|
policy performance itr: 199 mean_rewards: 55146.11338635617 coverage: 1.0 |
goals itr: 199 |
|
Mean rewards |
Coverages |